package com.shujia.spark.core

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo20Submit {

  def main(args: Array[String]): Unit = {

    val conf: SparkConf = new SparkConf()
      .setAppName("wc")

    val sc = new SparkContext(conf)

    val linesRDD: RDD[String] = sc.textFile("/data/wordcount.txt")

    //2、将单纯切分出来
    val wordsRDD: RDD[String] = linesRDD.flatMap(line => line.split(","))

    //3、安装单词进行分组
    val groupRDD: RDD[(String, Iterable[String])] = wordsRDD.groupBy(word => word)

    //4、统计单词的数量
    val countRDD: RDD[(String, Int)] = groupRDD.map {
      case (word: String, ws: Iterable[String]) =>
        //计算单词的数量
        val count: Int = ws.size
        (word, count)
    }


    //5、整理数据
    val resultRDD: RDD[String] = countRDD.map {
      case (word: String, count: Int) =>
        word + "\t" + count
    }


    //5、保存数据
    resultRDD.saveAsTextFile("/data/wc")

  }

}
